## Activism NLP ##

# msgs <- read.table("Messages_2010_2018_v2.txt", header=TRUE, sep="_del_mn_")
msgs <- readLines("../data/Messages_2010_2018_v2.txt")


approach <- sub('.*\"\t\"', '', msgs)
approach <- gsub('\"','', approach)
approach <- gsub('"','', approach)

other_dta <- sub('\"\t\".*', '', msgs)
tweet_txt <- sub('.*\t\"', '', other_dta)
tweet_txt <- gsub('\"','', tweet_txt)
tweet_txt <- gsub('"','', tweet_txt)


otro_dta <- sub('\t\".*', '', other_dta)

dat <- do.call(rbind, strsplit(otro_dta[-1], "\t"))
dat <- as.data.frame(dat)
names(dat) <- c("permno", "date", "itrade", "dist_to_file",
                "message_id", "overnight", "userid")

dat$permno <- as.numeric(dat$permno)
dat$itrade <- as.numeric(dat$itrade)
dat$dist_to_file <- as.numeric(dat$dist_to_file)
dat$message_id <- as.numeric(dat$message_id)
dat$overnight <- as.numeric(dat$overnight)
dat$userid <- as.numeric(dat$userid)

dat$date <- as.Date(dat$date, format="%d%b%Y")

library("data.table")
dat <- as.data.table(dat)
dat$message_body <- tweet_txt[-1]
dat$approach <- approach[-1]
setkey(dat, dist_to_file, date, overnight, approach)
dat$ct <-1 

dat_ag <- dat[,list(num_msg <- sum(ct)), by='dist_to_file']

## Code to plot to see if the read worked ##
#plot(V1~dist_to_file, data=dat_ag, type="l", bty="n",las=1,
#     ylab="Number of StockTwits posts", xlab="Event Time (t=0 filing date)", ylim=c(0, 3100))
#abline(v=-60, lty="dotted", col="steelblue3")
#abline(v=0, lty="dotted", col="steelblue3")

#dat$activism<- grepl("activist", dat$message_body) | 
#  grepl("activism", dat$message_body) |
#  grepl("Activist", dat$message_body) |
#  grepl("Activism", dat$message_body) |
#  grepl("Ackman ", dat$message_body) |
#  grepl("Ichan ", dat$message_body) |
#  grepl("Peltz ", dat$message_body) |
#  grepl("ackman ", dat$message_body) |
#  grepl("ichan ", dat$message_body) |
#  grepl("Loeb ", dat$message_body) |
#  grepl("loeb ", dat$message_body) |
#  grepl("Pickens", dat$message_body) |
#  grepl("pickens", dat$message_body) |
#  grepl("burkle", dat$message_body) |
#  grepl("Burkle", dat$message_body) |
#  grepl("Pratte", dat$message_body) |
#  grepl("Soros ", dat$message_body) |
#  grepl("soros ", dat$message_body)
 
dat$activism<- grepl("activist", dat$message_body) | 
  grepl("activism", dat$message_body) |
  grepl("Activist", dat$message_body) |
  grepl("Activism", dat$message_body) 


dat$vol <- grepl("volatility", dat$message_body) |
  grepl("vol ", dat$message_body) |
  grepl("volatile", dat$message_body) |
  grepl("volume", dat$message_body) |
  grepl("turnover", dat$message_body) 

dat$liq <- grepl("liquidity", dat$message_body) |
  grepl("spread", dat$message_body)


dat$ret <- grepl("trend", dat$message_body) |
  grepl("chart", dat$message_body)|
  grepl("price", dat$message_body)
  


## Next block of code ... ##
## Plotting within-day and overnight messages ##
## Note the spike in overnight is only after the announcement day ##
## Not included in the paper ##
dat_ag <- dat[,list(num_msg <- sum(ct)), by='dist_to_file,overnight']

plot(V1~dist_to_file, data=dat_ag[dat_ag$overnight==0,], type="l", bty="n",las=1,
     ylab="Number of StockTwits posts", xlab="Event Time (t=0 filing date)", ylim=c(0, 2500))
abline(v=-60, lty="dotted", col="steelblue3")
abline(v=0, lty="dotted", col="steelblue3")

lines(V1~dist_to_file, data=dat_ag[dat_ag$overnight==1,], lty="dashed", col="blue", bty="n",las=1,
     ylab="Number of StockTwits posts", xlab="Event Time (t=0 filing date)", ylim=c(0, 3100))
abline(v=-60, lty="dotted", col="steelblue3")
abline(v=0, lty="dotted", col="steelblue3")


legend("topleft", legend=c("within day", "overnight"),
       col=c("black", "blue"), lty=c("solid", "dashed"),bty="n")


## --------------------------- ##

## Next block of code is Figure 1A ##


dat_ag <- dat[,list(num_msg = sum(ct),act_msg=sum(activism),
                    vol_msg=sum(vol), liq_msg=sum(liq), ret_msg=sum(ret)), by='dist_to_file']


dat_tech <- dat[dat$approach=="Technical", ]


dat_ag_t <- dat_tech[,list(num_msg = sum(ct),act_msg=sum(activism),
                    vol_msg=sum(vol), liq_msg=sum(liq), ret_msg=sum(ret)), by='dist_to_file']



plot(I(100*act_msg/num_msg)~dist_to_file, data=dat_ag, type="l", bty="n",las=1,
     ylab="Pct of tweets mentioning 'Activist'", xlab="Event Time (t=0 filing date)")
lines(I(100*act_msg/num_msg)~dist_to_file, data=dat_ag_t, type="l", bty="n",las=1, col= "blue", lty="dashed")
abline(v=-60, lty="dotted", col="steelblue3")
abline(v=0, lty="dotted", col="steelblue3")


legend("topleft", legend=c("all tweets", "tweets by technical users"),
       col=c("black", "blue"), lty=c("solid", "dashed"),bty="n")

